library(stringi)
library(Peptides)
library(bio3d)
setwd("D:/Batches_Sim/Batch_3")
Warning: The working directory was changed to D:/Batches_Sim/Batch_3 inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
files <- list.files(pattern="EKB1m.pdb$", recursive = TRUE)
files <- files[order(nchar(files), files)]
files <- paste0("D:/Batches_Sim/Batch_3/", files)
sequences <- list()
for (file in files){
pdb <- read.pdb(file)
seq <- pdbseq(pdb, inds = NULL, aa1 = TRUE)
sequences[[length(sequences) + 1 ]] <- seq
}
print(length(sequences))
[1] 312
Scales_variables <- list()
for (i in sequences){
new_element <- (aaDescriptors(i))
Scales_variables[[length(Scales_variables) + 1]] <- new_element
}
library(readxl)
library(writexl)
exceldata <- read_excel("C:\\Users\\venan\\OneDrive\\Desktop\\Work\\Scripts_for_Nick\\ML\\Dataset_Nick_heading.xlsx")
dfdata <- data.frame(exceldata)
df_scales <- as.data.frame(Scales_variables)
df_scales
col_names <- list()
for (i in names(df_scales)){
new_name <- i
col_names[[length(col_names) + 1]] <- new_name
}
col_sum <- data.frame(colSums(df_scales))
new_df_col_names <- col_names[1:66]
indeces <- grep("PP1", colnames(df_scales))
df <- data.frame()
for (i in indeces){
df <- rbind(df, col_sum[i:(i+65),])
}
df
names(df) <- new_df_col_names
new_df <- cbind(dfdata, df)
write_xlsx(new_df, "C:\\Users\\venan\\OneDrive\\Desktop\\Work\\Scripts_for_Nick\\ML\\new_df_66.xlsx")
new_df
Error: object 'new_df' not found
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2sgelNjYWxlIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCmBgYHtyfQ0KbGlicmFyeShzdHJpbmdpKQ0KbGlicmFyeShQZXB0aWRlcykNCmxpYnJhcnkoYmlvM2QpDQpzZXR3ZCgiRDovQmF0Y2hlc19TaW0vQmF0Y2hfMyIpDQpmaWxlcyA8LSBsaXN0LmZpbGVzKHBhdHRlcm49IkVLQjFtLnBkYiQiLCByZWN1cnNpdmUgPSBUUlVFKQ0KZmlsZXMgPC0gZmlsZXNbb3JkZXIobmNoYXIoZmlsZXMpLCBmaWxlcyldDQpmaWxlcyA8LSBwYXN0ZTAoIkQ6L0JhdGNoZXNfU2ltL0JhdGNoXzMvIiwgZmlsZXMpDQoNCnNlcXVlbmNlcyA8LSBsaXN0KCkNCmZvciAoZmlsZSBpbiBmaWxlcyl7DQogIHBkYiA8LSByZWFkLnBkYihmaWxlKQ0KICBzZXEgPC0gcGRic2VxKHBkYiwgaW5kcyA9IE5VTEwsIGFhMSA9IFRSVUUpDQogIHNlcXVlbmNlc1tbbGVuZ3RoKHNlcXVlbmNlcykgKyAxIF1dIDwtIHNlcQ0KfQ0KYGBgDQpgYGB7cn0NCnByaW50KGxlbmd0aChzZXF1ZW5jZXMpKQ0KYGBgDQoNCmBgYHtyfQ0KU2NhbGVzX3ZhcmlhYmxlcyA8LSBsaXN0KCkNCmZvciAoaSBpbiBzZXF1ZW5jZXMpew0KICBuZXdfZWxlbWVudCA8LSAoYWFEZXNjcmlwdG9ycyhpKSkNCiAgU2NhbGVzX3ZhcmlhYmxlc1tbbGVuZ3RoKFNjYWxlc192YXJpYWJsZXMpICsgMV1dIDwtIG5ld19lbGVtZW50DQp9DQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KHJlYWR4bCkNCmxpYnJhcnkod3JpdGV4bCkNCmV4Y2VsZGF0YSA8LSByZWFkX2V4Y2VsKCJDOlxcVXNlcnNcXHZlbmFuXFxPbmVEcml2ZVxcRGVza3RvcFxcV29ya1xcU2NyaXB0c19mb3JfTmlja1xcTUxcXERhdGFzZXRfTmlja19oZWFkaW5nLnhsc3giKQ0KZGZkYXRhIDwtIGRhdGEuZnJhbWUoZXhjZWxkYXRhKQ0KZGZfc2NhbGVzIDwtIGFzLmRhdGEuZnJhbWUoU2NhbGVzX3ZhcmlhYmxlcykNCmRmX3NjYWxlcw0KYGBgDQoNCg0KYGBge3J9DQpjb2xfbmFtZXMgPC0gbGlzdCgpDQpmb3IgKGkgaW4gbmFtZXMoZGZfc2NhbGVzKSl7DQogIG5ld19uYW1lIDwtIGkNCiAgY29sX25hbWVzW1tsZW5ndGgoY29sX25hbWVzKSArIDFdXSA8LSBuZXdfbmFtZQ0KfQ0KY29sX3N1bSA8LSBkYXRhLmZyYW1lKGNvbFN1bXMoZGZfc2NhbGVzKSkNCm5ld19kZl9jb2xfbmFtZXMgPC0gY29sX25hbWVzWzE6NjZdDQoNCmBgYA0KDQpgYGB7cn0NCmluZGVjZXMgPC0gZ3JlcCgiUFAxIiwgY29sbmFtZXMoZGZfc2NhbGVzKSkNCmRmIDwtIGRhdGEuZnJhbWUoKQ0KZm9yIChpIGluIGluZGVjZXMpew0KICAgIGRmIDwtIHJiaW5kKGRmLCBjb2xfc3VtW2k6KGkrNjUpLF0pDQogIH0NCmRmDQpgYGANCmBgYHtyfQ0KbmFtZXMoZGYpIDwtIG5ld19kZl9jb2xfbmFtZXMNCmBgYA0KYGBge3J9DQpuZXdfZGYgPC0gY2JpbmQoZGZkYXRhLCBkZikNCndyaXRlX3hsc3gobmV3X2RmLCAiQzpcXFVzZXJzXFx2ZW5hblxcT25lRHJpdmVcXERlc2t0b3BcXFdvcmtcXFNjcmlwdHNfZm9yX05pY2tcXE1MXFxuZXdfZGZfNjYueGxzeCIpDQoNCmBgYA0KDQpgYGB7cn0NCm5ld19kZg0KYGBg